{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "8ca63d10",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "e637115a",
   "metadata": {},
   "outputs": [],
   "source": [
    "b_a = pd.read_excel(r'F:\\work\\2023.08.26DK提取路径\\相关数据\\1B-A.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "c808bef1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>基础主题</th>\n",
       "      <th>年份1</th>\n",
       "      <th>应用主题1</th>\n",
       "      <th>年份2</th>\n",
       "      <th>绝对值1</th>\n",
       "      <th>年份差1</th>\n",
       "      <th>weight1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>B20</td>\n",
       "      <td>2016.289</td>\n",
       "      <td>A5</td>\n",
       "      <td>2016.315</td>\n",
       "      <td>71.145107</td>\n",
       "      <td>0.026</td>\n",
       "      <td>0.005483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B5</td>\n",
       "      <td>2015.128</td>\n",
       "      <td>A11</td>\n",
       "      <td>2015.437</td>\n",
       "      <td>69.033516</td>\n",
       "      <td>0.309</td>\n",
       "      <td>0.005320</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>B26</td>\n",
       "      <td>2013.829</td>\n",
       "      <td>A11</td>\n",
       "      <td>2015.437</td>\n",
       "      <td>60.843124</td>\n",
       "      <td>1.608</td>\n",
       "      <td>0.004689</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>B0</td>\n",
       "      <td>2014.092</td>\n",
       "      <td>A0</td>\n",
       "      <td>2016.318</td>\n",
       "      <td>49.546486</td>\n",
       "      <td>2.226</td>\n",
       "      <td>0.003818</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>B18</td>\n",
       "      <td>2014.376</td>\n",
       "      <td>A0</td>\n",
       "      <td>2016.318</td>\n",
       "      <td>48.733184</td>\n",
       "      <td>1.942</td>\n",
       "      <td>0.003756</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>255</th>\n",
       "      <td>B23</td>\n",
       "      <td>2012.457</td>\n",
       "      <td>A9</td>\n",
       "      <td>2017.567</td>\n",
       "      <td>19.463137</td>\n",
       "      <td>5.110</td>\n",
       "      <td>0.001500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>256</th>\n",
       "      <td>B3</td>\n",
       "      <td>2013.702</td>\n",
       "      <td>A12</td>\n",
       "      <td>2018.132</td>\n",
       "      <td>19.379900</td>\n",
       "      <td>4.430</td>\n",
       "      <td>0.001494</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>257</th>\n",
       "      <td>B15</td>\n",
       "      <td>2013.858</td>\n",
       "      <td>A21</td>\n",
       "      <td>2020.103</td>\n",
       "      <td>19.374468</td>\n",
       "      <td>6.245</td>\n",
       "      <td>0.001493</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>258</th>\n",
       "      <td>B7</td>\n",
       "      <td>2013.570</td>\n",
       "      <td>A26</td>\n",
       "      <td>2018.645</td>\n",
       "      <td>19.372468</td>\n",
       "      <td>5.075</td>\n",
       "      <td>0.001493</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>259</th>\n",
       "      <td>B25</td>\n",
       "      <td>2010.477</td>\n",
       "      <td>A10</td>\n",
       "      <td>2016.594</td>\n",
       "      <td>19.354757</td>\n",
       "      <td>6.117</td>\n",
       "      <td>0.001492</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>260 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    基础主题       年份1 应用主题1       年份2       绝对值1   年份差1   weight1\n",
       "0    B20  2016.289    A5  2016.315  71.145107  0.026  0.005483\n",
       "1     B5  2015.128   A11  2015.437  69.033516  0.309  0.005320\n",
       "2    B26  2013.829   A11  2015.437  60.843124  1.608  0.004689\n",
       "3     B0  2014.092    A0  2016.318  49.546486  2.226  0.003818\n",
       "4    B18  2014.376    A0  2016.318  48.733184  1.942  0.003756\n",
       "..   ...       ...   ...       ...        ...    ...       ...\n",
       "255  B23  2012.457    A9  2017.567  19.463137  5.110  0.001500\n",
       "256   B3  2013.702   A12  2018.132  19.379900  4.430  0.001494\n",
       "257  B15  2013.858   A21  2020.103  19.374468  6.245  0.001493\n",
       "258   B7  2013.570   A26  2018.645  19.372468  5.075  0.001493\n",
       "259  B25  2010.477   A10  2016.594  19.354757  6.117  0.001492\n",
       "\n",
       "[260 rows x 7 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b_a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "40a3c5db",
   "metadata": {},
   "outputs": [],
   "source": [
    "a_t = pd.read_excel(r'F:\\work\\2023.08.26DK提取路径\\相关数据\\2A-T.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "b23b9e50",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>应用主题2</th>\n",
       "      <th>年份2</th>\n",
       "      <th>技术主题1</th>\n",
       "      <th>年份3</th>\n",
       "      <th>绝对值2</th>\n",
       "      <th>年份差2</th>\n",
       "      <th>weight2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A20</td>\n",
       "      <td>2016.079</td>\n",
       "      <td>T4</td>\n",
       "      <td>2016.927</td>\n",
       "      <td>103.582626</td>\n",
       "      <td>0.848</td>\n",
       "      <td>0.007983</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>2011.062</td>\n",
       "      <td>T31</td>\n",
       "      <td>2011.698</td>\n",
       "      <td>89.223866</td>\n",
       "      <td>0.636</td>\n",
       "      <td>0.006876</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A25</td>\n",
       "      <td>2015.594</td>\n",
       "      <td>T11</td>\n",
       "      <td>2016.133</td>\n",
       "      <td>71.858611</td>\n",
       "      <td>0.539</td>\n",
       "      <td>0.005538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A6</td>\n",
       "      <td>2014.096</td>\n",
       "      <td>T35</td>\n",
       "      <td>2014.258</td>\n",
       "      <td>69.337287</td>\n",
       "      <td>0.162</td>\n",
       "      <td>0.005343</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A11</td>\n",
       "      <td>2015.437</td>\n",
       "      <td>T27</td>\n",
       "      <td>2015.581</td>\n",
       "      <td>66.636650</td>\n",
       "      <td>0.144</td>\n",
       "      <td>0.005135</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>175</th>\n",
       "      <td>A26</td>\n",
       "      <td>2018.645</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.981221</td>\n",
       "      <td>0.618</td>\n",
       "      <td>0.001771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>176</th>\n",
       "      <td>A18</td>\n",
       "      <td>2015.758</td>\n",
       "      <td>T2</td>\n",
       "      <td>2017.285</td>\n",
       "      <td>22.860073</td>\n",
       "      <td>1.527</td>\n",
       "      <td>0.001762</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>177</th>\n",
       "      <td>A11</td>\n",
       "      <td>2015.437</td>\n",
       "      <td>T10</td>\n",
       "      <td>2017.672</td>\n",
       "      <td>22.592894</td>\n",
       "      <td>2.235</td>\n",
       "      <td>0.001741</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178</th>\n",
       "      <td>A12</td>\n",
       "      <td>2018.132</td>\n",
       "      <td>T7</td>\n",
       "      <td>2018.670</td>\n",
       "      <td>22.505807</td>\n",
       "      <td>0.538</td>\n",
       "      <td>0.001734</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>179</th>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>180 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    应用主题2       年份2 技术主题1       年份3        绝对值2   年份差2   weight2\n",
       "0     A20  2016.079    T4  2016.927  103.582626  0.848  0.007983\n",
       "1      A1  2011.062   T31  2011.698   89.223866  0.636  0.006876\n",
       "2     A25  2015.594   T11  2016.133   71.858611  0.539  0.005538\n",
       "3      A6  2014.096   T35  2014.258   69.337287  0.162  0.005343\n",
       "4     A11  2015.437   T27  2015.581   66.636650  0.144  0.005135\n",
       "..    ...       ...   ...       ...         ...    ...       ...\n",
       "175   A26  2018.645   T16  2019.263   22.981221  0.618  0.001771\n",
       "176   A18  2015.758    T2  2017.285   22.860073  1.527  0.001762\n",
       "177   A11  2015.437   T10  2017.672   22.592894  2.235  0.001741\n",
       "178   A12  2018.132    T7  2018.670   22.505807  0.538  0.001734\n",
       "179   A19  2018.807   T16  2019.263   22.472464  0.456  0.001732\n",
       "\n",
       "[180 rows x 7 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a_t"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6ff1abb5",
   "metadata": {},
   "source": [
    "##  将b_a对齐到a_t"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "877235ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "idex_ls = []\n",
    "df_ls = []# 最终的纵向合并\n",
    "\n",
    "for idx,a in enumerate(a_t['应用主题2'].values[:]):\n",
    "#     print(idx)\n",
    "#     print(a)\n",
    "    df_ba = b_a[b_a['应用主题1'] == a]  #取得与at对应的ba\n",
    "    df_at = a_t.iloc[[idx for i in range(0,df_ba.shape[0])],:]  # 取得与df_ba同样行的a_t\n",
    "    df = pd.concat([df_ba.reset_index(drop = True),df_at.reset_index(drop = True)],axis=1) # 横向拼接上边两个表\n",
    "    df_ls.append(df)\n",
    "all_df = pd.concat(df_ls)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "7980ebbd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1444, 14)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "9e3c23b7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>基础主题</th>\n",
       "      <th>年份1</th>\n",
       "      <th>应用主题1</th>\n",
       "      <th>年份2</th>\n",
       "      <th>绝对值1</th>\n",
       "      <th>年份差1</th>\n",
       "      <th>weight1</th>\n",
       "      <th>应用主题2</th>\n",
       "      <th>年份2</th>\n",
       "      <th>技术主题1</th>\n",
       "      <th>年份3</th>\n",
       "      <th>绝对值2</th>\n",
       "      <th>年份差2</th>\n",
       "      <th>weight2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>B21</td>\n",
       "      <td>2011.365</td>\n",
       "      <td>A20</td>\n",
       "      <td>2016.079</td>\n",
       "      <td>21.032535</td>\n",
       "      <td>4.714</td>\n",
       "      <td>0.001621</td>\n",
       "      <td>A20</td>\n",
       "      <td>2016.079</td>\n",
       "      <td>T4</td>\n",
       "      <td>2016.927</td>\n",
       "      <td>103.582626</td>\n",
       "      <td>0.848</td>\n",
       "      <td>0.007983</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>B25</td>\n",
       "      <td>2010.477</td>\n",
       "      <td>A1</td>\n",
       "      <td>2011.062</td>\n",
       "      <td>30.296361</td>\n",
       "      <td>0.585</td>\n",
       "      <td>0.002335</td>\n",
       "      <td>A1</td>\n",
       "      <td>2011.062</td>\n",
       "      <td>T31</td>\n",
       "      <td>2011.698</td>\n",
       "      <td>89.223866</td>\n",
       "      <td>0.636</td>\n",
       "      <td>0.006876</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>B10</td>\n",
       "      <td>2011.645</td>\n",
       "      <td>A25</td>\n",
       "      <td>2015.594</td>\n",
       "      <td>36.210261</td>\n",
       "      <td>3.949</td>\n",
       "      <td>0.002791</td>\n",
       "      <td>A25</td>\n",
       "      <td>2015.594</td>\n",
       "      <td>T11</td>\n",
       "      <td>2016.133</td>\n",
       "      <td>71.858611</td>\n",
       "      <td>0.539</td>\n",
       "      <td>0.005538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B21</td>\n",
       "      <td>2011.365</td>\n",
       "      <td>A25</td>\n",
       "      <td>2015.594</td>\n",
       "      <td>27.359697</td>\n",
       "      <td>4.229</td>\n",
       "      <td>0.002108</td>\n",
       "      <td>A25</td>\n",
       "      <td>2015.594</td>\n",
       "      <td>T11</td>\n",
       "      <td>2016.133</td>\n",
       "      <td>71.858611</td>\n",
       "      <td>0.539</td>\n",
       "      <td>0.005538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>B0</td>\n",
       "      <td>2014.092</td>\n",
       "      <td>A25</td>\n",
       "      <td>2015.594</td>\n",
       "      <td>23.639508</td>\n",
       "      <td>1.502</td>\n",
       "      <td>0.001822</td>\n",
       "      <td>A25</td>\n",
       "      <td>2015.594</td>\n",
       "      <td>T11</td>\n",
       "      <td>2016.133</td>\n",
       "      <td>71.858611</td>\n",
       "      <td>0.539</td>\n",
       "      <td>0.005538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>B13</td>\n",
       "      <td>2014.964</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>24.760734</td>\n",
       "      <td>3.843</td>\n",
       "      <td>0.001908</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>B1</td>\n",
       "      <td>2014.468</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>23.985880</td>\n",
       "      <td>4.339</td>\n",
       "      <td>0.001848</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>B0</td>\n",
       "      <td>2014.092</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>23.706140</td>\n",
       "      <td>4.715</td>\n",
       "      <td>0.001827</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>B11</td>\n",
       "      <td>2016.364</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>21.993360</td>\n",
       "      <td>2.443</td>\n",
       "      <td>0.001695</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>B8</td>\n",
       "      <td>2013.253</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>21.071649</td>\n",
       "      <td>5.554</td>\n",
       "      <td>0.001624</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1444 rows × 14 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   基础主题       年份1 应用主题1       年份2       绝对值1   年份差1   weight1 应用主题2       年份2  \\\n",
       "0   B21  2011.365   A20  2016.079  21.032535  4.714  0.001621   A20  2016.079   \n",
       "0   B25  2010.477    A1  2011.062  30.296361  0.585  0.002335    A1  2011.062   \n",
       "0   B10  2011.645   A25  2015.594  36.210261  3.949  0.002791   A25  2015.594   \n",
       "1   B21  2011.365   A25  2015.594  27.359697  4.229  0.002108   A25  2015.594   \n",
       "2    B0  2014.092   A25  2015.594  23.639508  1.502  0.001822   A25  2015.594   \n",
       "..  ...       ...   ...       ...        ...    ...       ...   ...       ...   \n",
       "3   B13  2014.964   A19  2018.807  24.760734  3.843  0.001908   A19  2018.807   \n",
       "4    B1  2014.468   A19  2018.807  23.985880  4.339  0.001848   A19  2018.807   \n",
       "5    B0  2014.092   A19  2018.807  23.706140  4.715  0.001827   A19  2018.807   \n",
       "6   B11  2016.364   A19  2018.807  21.993360  2.443  0.001695   A19  2018.807   \n",
       "7    B8  2013.253   A19  2018.807  21.071649  5.554  0.001624   A19  2018.807   \n",
       "\n",
       "   技术主题1       年份3        绝对值2   年份差2   weight2  \n",
       "0     T4  2016.927  103.582626  0.848  0.007983  \n",
       "0    T31  2011.698   89.223866  0.636  0.006876  \n",
       "0    T11  2016.133   71.858611  0.539  0.005538  \n",
       "1    T11  2016.133   71.858611  0.539  0.005538  \n",
       "2    T11  2016.133   71.858611  0.539  0.005538  \n",
       "..   ...       ...         ...    ...       ...  \n",
       "3    T16  2019.263   22.472464  0.456  0.001732  \n",
       "4    T16  2019.263   22.472464  0.456  0.001732  \n",
       "5    T16  2019.263   22.472464  0.456  0.001732  \n",
       "6    T16  2019.263   22.472464  0.456  0.001732  \n",
       "7    T16  2019.263   22.472464  0.456  0.001732  \n",
       "\n",
       "[1444 rows x 14 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "97379260",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_df.to_excel(r'F:\\work\\2023.08.26DK提取路径\\相关数据\\思路2\\全三阶段路径_B—A—T.xlsx',index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "e96c7607",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1444, 14)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_df.drop_duplicates().shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "86ed3222",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_bat = all_df.reset_index(drop = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "1751f2a1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>基础主题</th>\n",
       "      <th>年份1</th>\n",
       "      <th>应用主题1</th>\n",
       "      <th>年份2</th>\n",
       "      <th>绝对值1</th>\n",
       "      <th>年份差1</th>\n",
       "      <th>weight1</th>\n",
       "      <th>应用主题2</th>\n",
       "      <th>年份2</th>\n",
       "      <th>技术主题1</th>\n",
       "      <th>年份3</th>\n",
       "      <th>绝对值2</th>\n",
       "      <th>年份差2</th>\n",
       "      <th>weight2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>B21</td>\n",
       "      <td>2011.365</td>\n",
       "      <td>A20</td>\n",
       "      <td>2016.079</td>\n",
       "      <td>21.032535</td>\n",
       "      <td>4.714</td>\n",
       "      <td>0.001621</td>\n",
       "      <td>A20</td>\n",
       "      <td>2016.079</td>\n",
       "      <td>T4</td>\n",
       "      <td>2016.927</td>\n",
       "      <td>103.582626</td>\n",
       "      <td>0.848</td>\n",
       "      <td>0.007983</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B25</td>\n",
       "      <td>2010.477</td>\n",
       "      <td>A1</td>\n",
       "      <td>2011.062</td>\n",
       "      <td>30.296361</td>\n",
       "      <td>0.585</td>\n",
       "      <td>0.002335</td>\n",
       "      <td>A1</td>\n",
       "      <td>2011.062</td>\n",
       "      <td>T31</td>\n",
       "      <td>2011.698</td>\n",
       "      <td>89.223866</td>\n",
       "      <td>0.636</td>\n",
       "      <td>0.006876</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>B10</td>\n",
       "      <td>2011.645</td>\n",
       "      <td>A25</td>\n",
       "      <td>2015.594</td>\n",
       "      <td>36.210261</td>\n",
       "      <td>3.949</td>\n",
       "      <td>0.002791</td>\n",
       "      <td>A25</td>\n",
       "      <td>2015.594</td>\n",
       "      <td>T11</td>\n",
       "      <td>2016.133</td>\n",
       "      <td>71.858611</td>\n",
       "      <td>0.539</td>\n",
       "      <td>0.005538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>B21</td>\n",
       "      <td>2011.365</td>\n",
       "      <td>A25</td>\n",
       "      <td>2015.594</td>\n",
       "      <td>27.359697</td>\n",
       "      <td>4.229</td>\n",
       "      <td>0.002108</td>\n",
       "      <td>A25</td>\n",
       "      <td>2015.594</td>\n",
       "      <td>T11</td>\n",
       "      <td>2016.133</td>\n",
       "      <td>71.858611</td>\n",
       "      <td>0.539</td>\n",
       "      <td>0.005538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>B0</td>\n",
       "      <td>2014.092</td>\n",
       "      <td>A25</td>\n",
       "      <td>2015.594</td>\n",
       "      <td>23.639508</td>\n",
       "      <td>1.502</td>\n",
       "      <td>0.001822</td>\n",
       "      <td>A25</td>\n",
       "      <td>2015.594</td>\n",
       "      <td>T11</td>\n",
       "      <td>2016.133</td>\n",
       "      <td>71.858611</td>\n",
       "      <td>0.539</td>\n",
       "      <td>0.005538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1439</th>\n",
       "      <td>B13</td>\n",
       "      <td>2014.964</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>24.760734</td>\n",
       "      <td>3.843</td>\n",
       "      <td>0.001908</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1440</th>\n",
       "      <td>B1</td>\n",
       "      <td>2014.468</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>23.985880</td>\n",
       "      <td>4.339</td>\n",
       "      <td>0.001848</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1441</th>\n",
       "      <td>B0</td>\n",
       "      <td>2014.092</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>23.706140</td>\n",
       "      <td>4.715</td>\n",
       "      <td>0.001827</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1442</th>\n",
       "      <td>B11</td>\n",
       "      <td>2016.364</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>21.993360</td>\n",
       "      <td>2.443</td>\n",
       "      <td>0.001695</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1443</th>\n",
       "      <td>B8</td>\n",
       "      <td>2013.253</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>21.071649</td>\n",
       "      <td>5.554</td>\n",
       "      <td>0.001624</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1444 rows × 14 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     基础主题       年份1 应用主题1       年份2       绝对值1   年份差1   weight1 应用主题2  \\\n",
       "0     B21  2011.365   A20  2016.079  21.032535  4.714  0.001621   A20   \n",
       "1     B25  2010.477    A1  2011.062  30.296361  0.585  0.002335    A1   \n",
       "2     B10  2011.645   A25  2015.594  36.210261  3.949  0.002791   A25   \n",
       "3     B21  2011.365   A25  2015.594  27.359697  4.229  0.002108   A25   \n",
       "4      B0  2014.092   A25  2015.594  23.639508  1.502  0.001822   A25   \n",
       "...   ...       ...   ...       ...        ...    ...       ...   ...   \n",
       "1439  B13  2014.964   A19  2018.807  24.760734  3.843  0.001908   A19   \n",
       "1440   B1  2014.468   A19  2018.807  23.985880  4.339  0.001848   A19   \n",
       "1441   B0  2014.092   A19  2018.807  23.706140  4.715  0.001827   A19   \n",
       "1442  B11  2016.364   A19  2018.807  21.993360  2.443  0.001695   A19   \n",
       "1443   B8  2013.253   A19  2018.807  21.071649  5.554  0.001624   A19   \n",
       "\n",
       "           年份2 技术主题1       年份3        绝对值2   年份差2   weight2  \n",
       "0     2016.079    T4  2016.927  103.582626  0.848  0.007983  \n",
       "1     2011.062   T31  2011.698   89.223866  0.636  0.006876  \n",
       "2     2015.594   T11  2016.133   71.858611  0.539  0.005538  \n",
       "3     2015.594   T11  2016.133   71.858611  0.539  0.005538  \n",
       "4     2015.594   T11  2016.133   71.858611  0.539  0.005538  \n",
       "...        ...   ...       ...         ...    ...       ...  \n",
       "1439  2018.807   T16  2019.263   22.472464  0.456  0.001732  \n",
       "1440  2018.807   T16  2019.263   22.472464  0.456  0.001732  \n",
       "1441  2018.807   T16  2019.263   22.472464  0.456  0.001732  \n",
       "1442  2018.807   T16  2019.263   22.472464  0.456  0.001732  \n",
       "1443  2018.807   T16  2019.263   22.472464  0.456  0.001732  \n",
       "\n",
       "[1444 rows x 14 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_bat"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c7894bb1",
   "metadata": {},
   "source": [
    "## 将bat对齐到ti\n",
    "- 得到全路径"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "3c261f55",
   "metadata": {},
   "outputs": [],
   "source": [
    "t_i = pd.read_excel(r'F:\\work\\2023.08.26DK提取路径\\相关数据\\3T-I.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "e52746a3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>技术主题2</th>\n",
       "      <th>年份4</th>\n",
       "      <th>产业主题1</th>\n",
       "      <th>年份5</th>\n",
       "      <th>绝对值3</th>\n",
       "      <th>年份差3</th>\n",
       "      <th>weight3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>T17</td>\n",
       "      <td>2013.126</td>\n",
       "      <td>I34</td>\n",
       "      <td>2017.743842</td>\n",
       "      <td>1</td>\n",
       "      <td>4.617842</td>\n",
       "      <td>0.000077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>T28</td>\n",
       "      <td>2014.269</td>\n",
       "      <td>I12</td>\n",
       "      <td>2018.170213</td>\n",
       "      <td>1</td>\n",
       "      <td>3.901213</td>\n",
       "      <td>0.000077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>T17</td>\n",
       "      <td>2013.126</td>\n",
       "      <td>I25</td>\n",
       "      <td>2015.513109</td>\n",
       "      <td>1</td>\n",
       "      <td>2.387109</td>\n",
       "      <td>0.000077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>T9</td>\n",
       "      <td>2013.711</td>\n",
       "      <td>I25</td>\n",
       "      <td>2015.513109</td>\n",
       "      <td>1</td>\n",
       "      <td>1.802109</td>\n",
       "      <td>0.000077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>T17</td>\n",
       "      <td>2013.126</td>\n",
       "      <td>I18</td>\n",
       "      <td>2014.564103</td>\n",
       "      <td>1</td>\n",
       "      <td>1.438103</td>\n",
       "      <td>0.000077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>T35</td>\n",
       "      <td>2014.258</td>\n",
       "      <td>I25</td>\n",
       "      <td>2015.513109</td>\n",
       "      <td>1</td>\n",
       "      <td>1.255109</td>\n",
       "      <td>0.000077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>T0</td>\n",
       "      <td>2016.427</td>\n",
       "      <td>I21</td>\n",
       "      <td>2017.039216</td>\n",
       "      <td>1</td>\n",
       "      <td>0.612216</td>\n",
       "      <td>0.000077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>T2</td>\n",
       "      <td>2017.285</td>\n",
       "      <td>I23</td>\n",
       "      <td>2017.411940</td>\n",
       "      <td>1</td>\n",
       "      <td>0.126940</td>\n",
       "      <td>0.000077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>T0</td>\n",
       "      <td>2016.427</td>\n",
       "      <td>I29</td>\n",
       "      <td>2018.926829</td>\n",
       "      <td>2</td>\n",
       "      <td>2.499829</td>\n",
       "      <td>0.000154</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>T1</td>\n",
       "      <td>2015.463</td>\n",
       "      <td>I23</td>\n",
       "      <td>2017.411940</td>\n",
       "      <td>2</td>\n",
       "      <td>1.948940</td>\n",
       "      <td>0.000154</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>T11</td>\n",
       "      <td>2016.133</td>\n",
       "      <td>I13</td>\n",
       "      <td>2017.307305</td>\n",
       "      <td>2</td>\n",
       "      <td>1.174305</td>\n",
       "      <td>0.000154</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>T0</td>\n",
       "      <td>2016.427</td>\n",
       "      <td>I20</td>\n",
       "      <td>2017.076923</td>\n",
       "      <td>2</td>\n",
       "      <td>0.649923</td>\n",
       "      <td>0.000154</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>T11</td>\n",
       "      <td>2016.133</td>\n",
       "      <td>I12</td>\n",
       "      <td>2018.170213</td>\n",
       "      <td>3</td>\n",
       "      <td>2.037213</td>\n",
       "      <td>0.000231</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>T0</td>\n",
       "      <td>2016.427</td>\n",
       "      <td>I32</td>\n",
       "      <td>2018.838710</td>\n",
       "      <td>4</td>\n",
       "      <td>2.411710</td>\n",
       "      <td>0.000308</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>T0</td>\n",
       "      <td>2016.427</td>\n",
       "      <td>I5</td>\n",
       "      <td>2016.696970</td>\n",
       "      <td>6</td>\n",
       "      <td>0.269970</td>\n",
       "      <td>0.000462</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>T0</td>\n",
       "      <td>2016.427</td>\n",
       "      <td>I2</td>\n",
       "      <td>2017.535354</td>\n",
       "      <td>13</td>\n",
       "      <td>1.108354</td>\n",
       "      <td>0.001002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>T11</td>\n",
       "      <td>2016.133</td>\n",
       "      <td>I14</td>\n",
       "      <td>2017.771429</td>\n",
       "      <td>17</td>\n",
       "      <td>1.638429</td>\n",
       "      <td>0.001310</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>T0</td>\n",
       "      <td>2016.427</td>\n",
       "      <td>I28</td>\n",
       "      <td>2017.674797</td>\n",
       "      <td>17</td>\n",
       "      <td>1.247797</td>\n",
       "      <td>0.001310</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>T0</td>\n",
       "      <td>2016.427</td>\n",
       "      <td>I11</td>\n",
       "      <td>2017.209302</td>\n",
       "      <td>18</td>\n",
       "      <td>0.782302</td>\n",
       "      <td>0.001387</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>T0</td>\n",
       "      <td>2016.427</td>\n",
       "      <td>I12</td>\n",
       "      <td>2018.170213</td>\n",
       "      <td>25</td>\n",
       "      <td>1.743213</td>\n",
       "      <td>0.001927</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>T0</td>\n",
       "      <td>2016.427</td>\n",
       "      <td>I23</td>\n",
       "      <td>2017.411940</td>\n",
       "      <td>36</td>\n",
       "      <td>0.984940</td>\n",
       "      <td>0.002774</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>T0</td>\n",
       "      <td>2016.427</td>\n",
       "      <td>I13</td>\n",
       "      <td>2017.307305</td>\n",
       "      <td>39</td>\n",
       "      <td>0.880305</td>\n",
       "      <td>0.003006</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>T0</td>\n",
       "      <td>2016.427</td>\n",
       "      <td>I34</td>\n",
       "      <td>2017.743842</td>\n",
       "      <td>50</td>\n",
       "      <td>1.316842</td>\n",
       "      <td>0.003853</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>T0</td>\n",
       "      <td>2016.427</td>\n",
       "      <td>I30</td>\n",
       "      <td>2017.257576</td>\n",
       "      <td>66</td>\n",
       "      <td>0.830576</td>\n",
       "      <td>0.005086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>T0</td>\n",
       "      <td>2016.427</td>\n",
       "      <td>I14</td>\n",
       "      <td>2017.771429</td>\n",
       "      <td>106</td>\n",
       "      <td>1.344429</td>\n",
       "      <td>0.008169</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   技术主题2       年份4 产业主题1          年份5  绝对值3      年份差3   weight3\n",
       "0    T17  2013.126   I34  2017.743842     1  4.617842  0.000077\n",
       "1    T28  2014.269   I12  2018.170213     1  3.901213  0.000077\n",
       "2    T17  2013.126   I25  2015.513109     1  2.387109  0.000077\n",
       "3     T9  2013.711   I25  2015.513109     1  1.802109  0.000077\n",
       "4    T17  2013.126   I18  2014.564103     1  1.438103  0.000077\n",
       "5    T35  2014.258   I25  2015.513109     1  1.255109  0.000077\n",
       "6     T0  2016.427   I21  2017.039216     1  0.612216  0.000077\n",
       "7     T2  2017.285   I23  2017.411940     1  0.126940  0.000077\n",
       "8     T0  2016.427   I29  2018.926829     2  2.499829  0.000154\n",
       "9     T1  2015.463   I23  2017.411940     2  1.948940  0.000154\n",
       "10   T11  2016.133   I13  2017.307305     2  1.174305  0.000154\n",
       "11    T0  2016.427   I20  2017.076923     2  0.649923  0.000154\n",
       "12   T11  2016.133   I12  2018.170213     3  2.037213  0.000231\n",
       "13    T0  2016.427   I32  2018.838710     4  2.411710  0.000308\n",
       "14    T0  2016.427    I5  2016.696970     6  0.269970  0.000462\n",
       "15    T0  2016.427    I2  2017.535354    13  1.108354  0.001002\n",
       "16   T11  2016.133   I14  2017.771429    17  1.638429  0.001310\n",
       "17    T0  2016.427   I28  2017.674797    17  1.247797  0.001310\n",
       "18    T0  2016.427   I11  2017.209302    18  0.782302  0.001387\n",
       "19    T0  2016.427   I12  2018.170213    25  1.743213  0.001927\n",
       "20    T0  2016.427   I23  2017.411940    36  0.984940  0.002774\n",
       "21    T0  2016.427   I13  2017.307305    39  0.880305  0.003006\n",
       "22    T0  2016.427   I34  2017.743842    50  1.316842  0.003853\n",
       "23    T0  2016.427   I30  2017.257576    66  0.830576  0.005086\n",
       "24    T0  2016.427   I14  2017.771429   106  1.344429  0.008169"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t_i"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "154d47cc",
   "metadata": {},
   "outputs": [],
   "source": [
    "idex_ls = []\n",
    "df_ls = []# 最终的纵向合并\n",
    "\n",
    "for idx,t in enumerate(t_i['技术主题2'].values[:]):\n",
    "#     print(idx)\n",
    "#     print(a)\n",
    "    df_bat2 = df_bat[df_bat['技术主题1'] == t]  #取得与at对应的ba\n",
    "    df_ti = t_i.iloc[[idx for i in range(0,df_bat2.shape[0])],:]  # 取得与df_ba同样行的a_t\n",
    "    df = pd.concat([df_bat2.reset_index(drop = True),df_ti.reset_index(drop = True)],axis=1) # 横向拼接上边两个表\n",
    "    df_ls.append(df)\n",
    "all_df = pd.concat(df_ls)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "cdd332fd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>基础主题</th>\n",
       "      <th>年份1</th>\n",
       "      <th>应用主题1</th>\n",
       "      <th>年份2</th>\n",
       "      <th>绝对值1</th>\n",
       "      <th>年份差1</th>\n",
       "      <th>weight1</th>\n",
       "      <th>应用主题2</th>\n",
       "      <th>年份2</th>\n",
       "      <th>技术主题1</th>\n",
       "      <th>...</th>\n",
       "      <th>绝对值2</th>\n",
       "      <th>年份差2</th>\n",
       "      <th>weight2</th>\n",
       "      <th>技术主题2</th>\n",
       "      <th>年份4</th>\n",
       "      <th>产业主题1</th>\n",
       "      <th>年份5</th>\n",
       "      <th>绝对值3</th>\n",
       "      <th>年份差3</th>\n",
       "      <th>weight3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>B25</td>\n",
       "      <td>2010.477</td>\n",
       "      <td>A1</td>\n",
       "      <td>2011.062</td>\n",
       "      <td>30.296361</td>\n",
       "      <td>0.585</td>\n",
       "      <td>0.002335</td>\n",
       "      <td>A1</td>\n",
       "      <td>2011.062</td>\n",
       "      <td>T17</td>\n",
       "      <td>...</td>\n",
       "      <td>37.122055</td>\n",
       "      <td>2.064</td>\n",
       "      <td>0.002861</td>\n",
       "      <td>T17</td>\n",
       "      <td>2013.126</td>\n",
       "      <td>I34</td>\n",
       "      <td>2017.743842</td>\n",
       "      <td>1</td>\n",
       "      <td>4.617842</td>\n",
       "      <td>0.000077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>B25</td>\n",
       "      <td>2010.477</td>\n",
       "      <td>A1</td>\n",
       "      <td>2011.062</td>\n",
       "      <td>30.296361</td>\n",
       "      <td>0.585</td>\n",
       "      <td>0.002335</td>\n",
       "      <td>A1</td>\n",
       "      <td>2011.062</td>\n",
       "      <td>T28</td>\n",
       "      <td>...</td>\n",
       "      <td>42.615256</td>\n",
       "      <td>3.207</td>\n",
       "      <td>0.003284</td>\n",
       "      <td>T28</td>\n",
       "      <td>2014.269</td>\n",
       "      <td>I12</td>\n",
       "      <td>2018.170213</td>\n",
       "      <td>1</td>\n",
       "      <td>3.901213</td>\n",
       "      <td>0.000077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>B25</td>\n",
       "      <td>2010.477</td>\n",
       "      <td>A1</td>\n",
       "      <td>2011.062</td>\n",
       "      <td>30.296361</td>\n",
       "      <td>0.585</td>\n",
       "      <td>0.002335</td>\n",
       "      <td>A1</td>\n",
       "      <td>2011.062</td>\n",
       "      <td>T17</td>\n",
       "      <td>...</td>\n",
       "      <td>37.122055</td>\n",
       "      <td>2.064</td>\n",
       "      <td>0.002861</td>\n",
       "      <td>T17</td>\n",
       "      <td>2013.126</td>\n",
       "      <td>I25</td>\n",
       "      <td>2015.513109</td>\n",
       "      <td>1</td>\n",
       "      <td>2.387109</td>\n",
       "      <td>0.000077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>B25</td>\n",
       "      <td>2010.477</td>\n",
       "      <td>A1</td>\n",
       "      <td>2011.062</td>\n",
       "      <td>30.296361</td>\n",
       "      <td>0.585</td>\n",
       "      <td>0.002335</td>\n",
       "      <td>A1</td>\n",
       "      <td>2011.062</td>\n",
       "      <td>T9</td>\n",
       "      <td>...</td>\n",
       "      <td>36.255734</td>\n",
       "      <td>2.649</td>\n",
       "      <td>0.002794</td>\n",
       "      <td>T9</td>\n",
       "      <td>2013.711</td>\n",
       "      <td>I25</td>\n",
       "      <td>2015.513109</td>\n",
       "      <td>1</td>\n",
       "      <td>1.802109</td>\n",
       "      <td>0.000077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>B25</td>\n",
       "      <td>2010.477</td>\n",
       "      <td>A1</td>\n",
       "      <td>2011.062</td>\n",
       "      <td>30.296361</td>\n",
       "      <td>0.585</td>\n",
       "      <td>0.002335</td>\n",
       "      <td>A1</td>\n",
       "      <td>2011.062</td>\n",
       "      <td>T17</td>\n",
       "      <td>...</td>\n",
       "      <td>37.122055</td>\n",
       "      <td>2.064</td>\n",
       "      <td>0.002861</td>\n",
       "      <td>T17</td>\n",
       "      <td>2013.126</td>\n",
       "      <td>I18</td>\n",
       "      <td>2014.564103</td>\n",
       "      <td>1</td>\n",
       "      <td>1.438103</td>\n",
       "      <td>0.000077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>B23</td>\n",
       "      <td>2012.457</td>\n",
       "      <td>A27</td>\n",
       "      <td>2015.081</td>\n",
       "      <td>22.079115</td>\n",
       "      <td>2.624</td>\n",
       "      <td>0.001702</td>\n",
       "      <td>A27</td>\n",
       "      <td>2015.081</td>\n",
       "      <td>T0</td>\n",
       "      <td>...</td>\n",
       "      <td>31.368010</td>\n",
       "      <td>1.346</td>\n",
       "      <td>0.002417</td>\n",
       "      <td>T0</td>\n",
       "      <td>2016.427</td>\n",
       "      <td>I14</td>\n",
       "      <td>2017.771429</td>\n",
       "      <td>106</td>\n",
       "      <td>1.344429</td>\n",
       "      <td>0.008169</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>B19</td>\n",
       "      <td>2011.187</td>\n",
       "      <td>A17</td>\n",
       "      <td>2014.564</td>\n",
       "      <td>44.381143</td>\n",
       "      <td>3.377</td>\n",
       "      <td>0.003420</td>\n",
       "      <td>A17</td>\n",
       "      <td>2014.564</td>\n",
       "      <td>T0</td>\n",
       "      <td>...</td>\n",
       "      <td>27.749592</td>\n",
       "      <td>1.863</td>\n",
       "      <td>0.002139</td>\n",
       "      <td>T0</td>\n",
       "      <td>2016.427</td>\n",
       "      <td>I14</td>\n",
       "      <td>2017.771429</td>\n",
       "      <td>106</td>\n",
       "      <td>1.344429</td>\n",
       "      <td>0.008169</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>58</th>\n",
       "      <td>B0</td>\n",
       "      <td>2014.092</td>\n",
       "      <td>A17</td>\n",
       "      <td>2014.564</td>\n",
       "      <td>27.996000</td>\n",
       "      <td>0.472</td>\n",
       "      <td>0.002158</td>\n",
       "      <td>A17</td>\n",
       "      <td>2014.564</td>\n",
       "      <td>T0</td>\n",
       "      <td>...</td>\n",
       "      <td>27.749592</td>\n",
       "      <td>1.863</td>\n",
       "      <td>0.002139</td>\n",
       "      <td>T0</td>\n",
       "      <td>2016.427</td>\n",
       "      <td>I14</td>\n",
       "      <td>2017.771429</td>\n",
       "      <td>106</td>\n",
       "      <td>1.344429</td>\n",
       "      <td>0.008169</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>B7</td>\n",
       "      <td>2013.570</td>\n",
       "      <td>A17</td>\n",
       "      <td>2014.564</td>\n",
       "      <td>24.331707</td>\n",
       "      <td>0.994</td>\n",
       "      <td>0.001875</td>\n",
       "      <td>A17</td>\n",
       "      <td>2014.564</td>\n",
       "      <td>T0</td>\n",
       "      <td>...</td>\n",
       "      <td>27.749592</td>\n",
       "      <td>1.863</td>\n",
       "      <td>0.002139</td>\n",
       "      <td>T0</td>\n",
       "      <td>2016.427</td>\n",
       "      <td>I14</td>\n",
       "      <td>2017.771429</td>\n",
       "      <td>106</td>\n",
       "      <td>1.344429</td>\n",
       "      <td>0.008169</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>B21</td>\n",
       "      <td>2011.365</td>\n",
       "      <td>A20</td>\n",
       "      <td>2016.079</td>\n",
       "      <td>21.032535</td>\n",
       "      <td>4.714</td>\n",
       "      <td>0.001621</td>\n",
       "      <td>A20</td>\n",
       "      <td>2016.079</td>\n",
       "      <td>T0</td>\n",
       "      <td>...</td>\n",
       "      <td>26.783024</td>\n",
       "      <td>0.348</td>\n",
       "      <td>0.002064</td>\n",
       "      <td>T0</td>\n",
       "      <td>2016.427</td>\n",
       "      <td>I14</td>\n",
       "      <td>2017.771429</td>\n",
       "      <td>106</td>\n",
       "      <td>1.344429</td>\n",
       "      <td>0.008169</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1008 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   基础主题       年份1 应用主题1       年份2       绝对值1   年份差1   weight1 应用主题2       年份2  \\\n",
       "0   B25  2010.477    A1  2011.062  30.296361  0.585  0.002335    A1  2011.062   \n",
       "0   B25  2010.477    A1  2011.062  30.296361  0.585  0.002335    A1  2011.062   \n",
       "0   B25  2010.477    A1  2011.062  30.296361  0.585  0.002335    A1  2011.062   \n",
       "0   B25  2010.477    A1  2011.062  30.296361  0.585  0.002335    A1  2011.062   \n",
       "0   B25  2010.477    A1  2011.062  30.296361  0.585  0.002335    A1  2011.062   \n",
       "..  ...       ...   ...       ...        ...    ...       ...   ...       ...   \n",
       "56  B23  2012.457   A27  2015.081  22.079115  2.624  0.001702   A27  2015.081   \n",
       "57  B19  2011.187   A17  2014.564  44.381143  3.377  0.003420   A17  2014.564   \n",
       "58   B0  2014.092   A17  2014.564  27.996000  0.472  0.002158   A17  2014.564   \n",
       "59   B7  2013.570   A17  2014.564  24.331707  0.994  0.001875   A17  2014.564   \n",
       "60  B21  2011.365   A20  2016.079  21.032535  4.714  0.001621   A20  2016.079   \n",
       "\n",
       "   技术主题1  ...       绝对值2   年份差2   weight2  技术主题2       年份4  产业主题1  \\\n",
       "0    T17  ...  37.122055  2.064  0.002861    T17  2013.126    I34   \n",
       "0    T28  ...  42.615256  3.207  0.003284    T28  2014.269    I12   \n",
       "0    T17  ...  37.122055  2.064  0.002861    T17  2013.126    I25   \n",
       "0     T9  ...  36.255734  2.649  0.002794     T9  2013.711    I25   \n",
       "0    T17  ...  37.122055  2.064  0.002861    T17  2013.126    I18   \n",
       "..   ...  ...        ...    ...       ...    ...       ...    ...   \n",
       "56    T0  ...  31.368010  1.346  0.002417     T0  2016.427    I14   \n",
       "57    T0  ...  27.749592  1.863  0.002139     T0  2016.427    I14   \n",
       "58    T0  ...  27.749592  1.863  0.002139     T0  2016.427    I14   \n",
       "59    T0  ...  27.749592  1.863  0.002139     T0  2016.427    I14   \n",
       "60    T0  ...  26.783024  0.348  0.002064     T0  2016.427    I14   \n",
       "\n",
       "            年份5  绝对值3      年份差3   weight3  \n",
       "0   2017.743842     1  4.617842  0.000077  \n",
       "0   2018.170213     1  3.901213  0.000077  \n",
       "0   2015.513109     1  2.387109  0.000077  \n",
       "0   2015.513109     1  1.802109  0.000077  \n",
       "0   2014.564103     1  1.438103  0.000077  \n",
       "..          ...   ...       ...       ...  \n",
       "56  2017.771429   106  1.344429  0.008169  \n",
       "57  2017.771429   106  1.344429  0.008169  \n",
       "58  2017.771429   106  1.344429  0.008169  \n",
       "59  2017.771429   106  1.344429  0.008169  \n",
       "60  2017.771429   106  1.344429  0.008169  \n",
       "\n",
       "[1008 rows x 21 columns]"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "988a4c73",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_bati = all_df.reset_index(drop = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "4c5f6270",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1008, 21)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_bati.drop_duplicates().shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "6b6e3473",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['I34', 'I12', 'I25', 'I18', 'I21', 'I23', 'I29', 'I13', 'I20',\n",
       "       'I32', 'I5', 'I2', 'I14', 'I28', 'I11', 'I30'], dtype=object)"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_bati['产业主题1'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "112aa838",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_bati.to_excel(r'F:\\work\\2023.08.26DK提取路径\\相关数据\\思路2\\全四阶段路径_BATI.xlsx',index = False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f2d86f9f",
   "metadata": {},
   "source": [
    "## 判断三路径\n",
    "- 技术主题1 不在t_i中"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "5c8da1b4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>基础主题</th>\n",
       "      <th>年份1</th>\n",
       "      <th>应用主题1</th>\n",
       "      <th>年份2</th>\n",
       "      <th>绝对值1</th>\n",
       "      <th>年份差1</th>\n",
       "      <th>weight1</th>\n",
       "      <th>应用主题2</th>\n",
       "      <th>年份2</th>\n",
       "      <th>技术主题1</th>\n",
       "      <th>年份3</th>\n",
       "      <th>绝对值2</th>\n",
       "      <th>年份差2</th>\n",
       "      <th>weight2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>B21</td>\n",
       "      <td>2011.365</td>\n",
       "      <td>A20</td>\n",
       "      <td>2016.079</td>\n",
       "      <td>21.032535</td>\n",
       "      <td>4.714</td>\n",
       "      <td>0.001621</td>\n",
       "      <td>A20</td>\n",
       "      <td>2016.079</td>\n",
       "      <td>T4</td>\n",
       "      <td>2016.927</td>\n",
       "      <td>103.582626</td>\n",
       "      <td>0.848</td>\n",
       "      <td>0.007983</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B25</td>\n",
       "      <td>2010.477</td>\n",
       "      <td>A1</td>\n",
       "      <td>2011.062</td>\n",
       "      <td>30.296361</td>\n",
       "      <td>0.585</td>\n",
       "      <td>0.002335</td>\n",
       "      <td>A1</td>\n",
       "      <td>2011.062</td>\n",
       "      <td>T31</td>\n",
       "      <td>2011.698</td>\n",
       "      <td>89.223866</td>\n",
       "      <td>0.636</td>\n",
       "      <td>0.006876</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>B10</td>\n",
       "      <td>2011.645</td>\n",
       "      <td>A25</td>\n",
       "      <td>2015.594</td>\n",
       "      <td>36.210261</td>\n",
       "      <td>3.949</td>\n",
       "      <td>0.002791</td>\n",
       "      <td>A25</td>\n",
       "      <td>2015.594</td>\n",
       "      <td>T11</td>\n",
       "      <td>2016.133</td>\n",
       "      <td>71.858611</td>\n",
       "      <td>0.539</td>\n",
       "      <td>0.005538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>B21</td>\n",
       "      <td>2011.365</td>\n",
       "      <td>A25</td>\n",
       "      <td>2015.594</td>\n",
       "      <td>27.359697</td>\n",
       "      <td>4.229</td>\n",
       "      <td>0.002108</td>\n",
       "      <td>A25</td>\n",
       "      <td>2015.594</td>\n",
       "      <td>T11</td>\n",
       "      <td>2016.133</td>\n",
       "      <td>71.858611</td>\n",
       "      <td>0.539</td>\n",
       "      <td>0.005538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>B0</td>\n",
       "      <td>2014.092</td>\n",
       "      <td>A25</td>\n",
       "      <td>2015.594</td>\n",
       "      <td>23.639508</td>\n",
       "      <td>1.502</td>\n",
       "      <td>0.001822</td>\n",
       "      <td>A25</td>\n",
       "      <td>2015.594</td>\n",
       "      <td>T11</td>\n",
       "      <td>2016.133</td>\n",
       "      <td>71.858611</td>\n",
       "      <td>0.539</td>\n",
       "      <td>0.005538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1439</th>\n",
       "      <td>B13</td>\n",
       "      <td>2014.964</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>24.760734</td>\n",
       "      <td>3.843</td>\n",
       "      <td>0.001908</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1440</th>\n",
       "      <td>B1</td>\n",
       "      <td>2014.468</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>23.985880</td>\n",
       "      <td>4.339</td>\n",
       "      <td>0.001848</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1441</th>\n",
       "      <td>B0</td>\n",
       "      <td>2014.092</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>23.706140</td>\n",
       "      <td>4.715</td>\n",
       "      <td>0.001827</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1442</th>\n",
       "      <td>B11</td>\n",
       "      <td>2016.364</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>21.993360</td>\n",
       "      <td>2.443</td>\n",
       "      <td>0.001695</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1443</th>\n",
       "      <td>B8</td>\n",
       "      <td>2013.253</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>21.071649</td>\n",
       "      <td>5.554</td>\n",
       "      <td>0.001624</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1444 rows × 14 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     基础主题       年份1 应用主题1       年份2       绝对值1   年份差1   weight1 应用主题2  \\\n",
       "0     B21  2011.365   A20  2016.079  21.032535  4.714  0.001621   A20   \n",
       "1     B25  2010.477    A1  2011.062  30.296361  0.585  0.002335    A1   \n",
       "2     B10  2011.645   A25  2015.594  36.210261  3.949  0.002791   A25   \n",
       "3     B21  2011.365   A25  2015.594  27.359697  4.229  0.002108   A25   \n",
       "4      B0  2014.092   A25  2015.594  23.639508  1.502  0.001822   A25   \n",
       "...   ...       ...   ...       ...        ...    ...       ...   ...   \n",
       "1439  B13  2014.964   A19  2018.807  24.760734  3.843  0.001908   A19   \n",
       "1440   B1  2014.468   A19  2018.807  23.985880  4.339  0.001848   A19   \n",
       "1441   B0  2014.092   A19  2018.807  23.706140  4.715  0.001827   A19   \n",
       "1442  B11  2016.364   A19  2018.807  21.993360  2.443  0.001695   A19   \n",
       "1443   B8  2013.253   A19  2018.807  21.071649  5.554  0.001624   A19   \n",
       "\n",
       "           年份2 技术主题1       年份3        绝对值2   年份差2   weight2  \n",
       "0     2016.079    T4  2016.927  103.582626  0.848  0.007983  \n",
       "1     2011.062   T31  2011.698   89.223866  0.636  0.006876  \n",
       "2     2015.594   T11  2016.133   71.858611  0.539  0.005538  \n",
       "3     2015.594   T11  2016.133   71.858611  0.539  0.005538  \n",
       "4     2015.594   T11  2016.133   71.858611  0.539  0.005538  \n",
       "...        ...   ...       ...         ...    ...       ...  \n",
       "1439  2018.807   T16  2019.263   22.472464  0.456  0.001732  \n",
       "1440  2018.807   T16  2019.263   22.472464  0.456  0.001732  \n",
       "1441  2018.807   T16  2019.263   22.472464  0.456  0.001732  \n",
       "1442  2018.807   T16  2019.263   22.472464  0.456  0.001732  \n",
       "1443  2018.807   T16  2019.263   22.472464  0.456  0.001732  \n",
       "\n",
       "[1444 rows x 14 columns]"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_bat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "05380ba0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['T17', 'T28', 'T9', 'T35', 'T0', 'T2', 'T1', 'T11'], dtype=object)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t_i['技术主题2'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "0b733677",
   "metadata": {},
   "outputs": [],
   "source": [
    "drop_idx = []\n",
    "for idx,value in enumerate(df_bat['技术主题1'].values):\n",
    "    if value  in t_i['技术主题2'].unique():\n",
    "        drop_idx.append(idx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "d8e668d2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "177"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(drop_idx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "a24bcecc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1444, 14)"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_bat.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "87429d99",
   "metadata": {},
   "outputs": [],
   "source": [
    "three = df_bat.drop(index = drop_idx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "6cd9e2b6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>基础主题</th>\n",
       "      <th>年份1</th>\n",
       "      <th>应用主题1</th>\n",
       "      <th>年份2</th>\n",
       "      <th>绝对值1</th>\n",
       "      <th>年份差1</th>\n",
       "      <th>weight1</th>\n",
       "      <th>应用主题2</th>\n",
       "      <th>年份2</th>\n",
       "      <th>技术主题1</th>\n",
       "      <th>年份3</th>\n",
       "      <th>绝对值2</th>\n",
       "      <th>年份差2</th>\n",
       "      <th>weight2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>B21</td>\n",
       "      <td>2011.365</td>\n",
       "      <td>A20</td>\n",
       "      <td>2016.079</td>\n",
       "      <td>21.032535</td>\n",
       "      <td>4.714</td>\n",
       "      <td>0.001621</td>\n",
       "      <td>A20</td>\n",
       "      <td>2016.079</td>\n",
       "      <td>T4</td>\n",
       "      <td>2016.927</td>\n",
       "      <td>103.582626</td>\n",
       "      <td>0.848</td>\n",
       "      <td>0.007983</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B25</td>\n",
       "      <td>2010.477</td>\n",
       "      <td>A1</td>\n",
       "      <td>2011.062</td>\n",
       "      <td>30.296361</td>\n",
       "      <td>0.585</td>\n",
       "      <td>0.002335</td>\n",
       "      <td>A1</td>\n",
       "      <td>2011.062</td>\n",
       "      <td>T31</td>\n",
       "      <td>2011.698</td>\n",
       "      <td>89.223866</td>\n",
       "      <td>0.636</td>\n",
       "      <td>0.006876</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>B5</td>\n",
       "      <td>2015.128</td>\n",
       "      <td>A11</td>\n",
       "      <td>2015.437</td>\n",
       "      <td>69.033516</td>\n",
       "      <td>0.309</td>\n",
       "      <td>0.005320</td>\n",
       "      <td>A11</td>\n",
       "      <td>2015.437</td>\n",
       "      <td>T27</td>\n",
       "      <td>2015.581</td>\n",
       "      <td>66.636650</td>\n",
       "      <td>0.144</td>\n",
       "      <td>0.005135</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>B26</td>\n",
       "      <td>2013.829</td>\n",
       "      <td>A11</td>\n",
       "      <td>2015.437</td>\n",
       "      <td>60.843124</td>\n",
       "      <td>1.608</td>\n",
       "      <td>0.004689</td>\n",
       "      <td>A11</td>\n",
       "      <td>2015.437</td>\n",
       "      <td>T27</td>\n",
       "      <td>2015.581</td>\n",
       "      <td>66.636650</td>\n",
       "      <td>0.144</td>\n",
       "      <td>0.005135</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>B22</td>\n",
       "      <td>2015.042</td>\n",
       "      <td>A11</td>\n",
       "      <td>2015.437</td>\n",
       "      <td>31.637615</td>\n",
       "      <td>0.395</td>\n",
       "      <td>0.002438</td>\n",
       "      <td>A11</td>\n",
       "      <td>2015.437</td>\n",
       "      <td>T27</td>\n",
       "      <td>2015.581</td>\n",
       "      <td>66.636650</td>\n",
       "      <td>0.144</td>\n",
       "      <td>0.005135</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1439</th>\n",
       "      <td>B13</td>\n",
       "      <td>2014.964</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>24.760734</td>\n",
       "      <td>3.843</td>\n",
       "      <td>0.001908</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1440</th>\n",
       "      <td>B1</td>\n",
       "      <td>2014.468</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>23.985880</td>\n",
       "      <td>4.339</td>\n",
       "      <td>0.001848</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1441</th>\n",
       "      <td>B0</td>\n",
       "      <td>2014.092</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>23.706140</td>\n",
       "      <td>4.715</td>\n",
       "      <td>0.001827</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1442</th>\n",
       "      <td>B11</td>\n",
       "      <td>2016.364</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>21.993360</td>\n",
       "      <td>2.443</td>\n",
       "      <td>0.001695</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1443</th>\n",
       "      <td>B8</td>\n",
       "      <td>2013.253</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>21.071649</td>\n",
       "      <td>5.554</td>\n",
       "      <td>0.001624</td>\n",
       "      <td>A19</td>\n",
       "      <td>2018.807</td>\n",
       "      <td>T16</td>\n",
       "      <td>2019.263</td>\n",
       "      <td>22.472464</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.001732</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1267 rows × 14 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     基础主题       年份1 应用主题1       年份2       绝对值1   年份差1   weight1 应用主题2  \\\n",
       "0     B21  2011.365   A20  2016.079  21.032535  4.714  0.001621   A20   \n",
       "1     B25  2010.477    A1  2011.062  30.296361  0.585  0.002335    A1   \n",
       "7      B5  2015.128   A11  2015.437  69.033516  0.309  0.005320   A11   \n",
       "8     B26  2013.829   A11  2015.437  60.843124  1.608  0.004689   A11   \n",
       "9     B22  2015.042   A11  2015.437  31.637615  0.395  0.002438   A11   \n",
       "...   ...       ...   ...       ...        ...    ...       ...   ...   \n",
       "1439  B13  2014.964   A19  2018.807  24.760734  3.843  0.001908   A19   \n",
       "1440   B1  2014.468   A19  2018.807  23.985880  4.339  0.001848   A19   \n",
       "1441   B0  2014.092   A19  2018.807  23.706140  4.715  0.001827   A19   \n",
       "1442  B11  2016.364   A19  2018.807  21.993360  2.443  0.001695   A19   \n",
       "1443   B8  2013.253   A19  2018.807  21.071649  5.554  0.001624   A19   \n",
       "\n",
       "           年份2 技术主题1       年份3        绝对值2   年份差2   weight2  \n",
       "0     2016.079    T4  2016.927  103.582626  0.848  0.007983  \n",
       "1     2011.062   T31  2011.698   89.223866  0.636  0.006876  \n",
       "7     2015.437   T27  2015.581   66.636650  0.144  0.005135  \n",
       "8     2015.437   T27  2015.581   66.636650  0.144  0.005135  \n",
       "9     2015.437   T27  2015.581   66.636650  0.144  0.005135  \n",
       "...        ...   ...       ...         ...    ...       ...  \n",
       "1439  2018.807   T16  2019.263   22.472464  0.456  0.001732  \n",
       "1440  2018.807   T16  2019.263   22.472464  0.456  0.001732  \n",
       "1441  2018.807   T16  2019.263   22.472464  0.456  0.001732  \n",
       "1442  2018.807   T16  2019.263   22.472464  0.456  0.001732  \n",
       "1443  2018.807   T16  2019.263   22.472464  0.456  0.001732  \n",
       "\n",
       "[1267 rows x 14 columns]"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "three"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "4a608e1f",
   "metadata": {},
   "outputs": [],
   "source": [
    "three.to_excel(r'F:\\work\\2023.08.26DK提取路径\\相关数据\\思路2\\三阶段路径_BAT.xlsx',index = False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6e487d06",
   "metadata": {},
   "source": [
    "# 判断两阶段\n",
    "- 应用主题1 \t不在at中"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "86d4a199",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['A20', 'A1', 'A25', 'A6', 'A11', 'A14', 'A5', 'A27', 'A0', 'A16',\n",
       "       'A12', 'A17', 'A13', 'A4', 'A2', 'A8', 'A3', 'A10', 'A21', 'A19',\n",
       "       'A26', 'A18'], dtype=object)"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a_t['应用主题2'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "f3548157",
   "metadata": {},
   "outputs": [],
   "source": [
    "drop_idx = []\n",
    "for idx,value in enumerate(b_a['应用主题1'].values):\n",
    "    if value in a_t['应用主题2'].unique():\n",
    "        drop_idx.append(idx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "60a6788d",
   "metadata": {},
   "outputs": [],
   "source": [
    "two = b_a.drop(index = drop_idx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "ea5b283b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(58, 7)"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "two.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "177861b1",
   "metadata": {},
   "outputs": [],
   "source": [
    "two.to_excel(r'F:\\work\\2023.08.26DK提取路径\\相关数据\\思路2\\两阶段路径_BA.xlsx',index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3041dc25",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "34ddfdc2",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "19295470",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
